package com.heima.common.tess4j;

import lombok.Data;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.InputStream;

/**
 * @author 陈辉
 * @data 2023 10:52
 */
@Component
@ConfigurationProperties(prefix = "tess4j")
@Data
public class TesseractClient {
    private String datapath;
    private String language;

    public String doOCR(InputStream is){
        try {
            //创建Tesseract对象
            ITesseract tesseract = new Tesseract();
            //设置字体库路径
            tesseract.setDatapath(datapath);
            //中文识别
            tesseract.setLanguage(language);

            //执行ocr识别
            //构建一个BufferedImage对象，用来封装minIO图片
            BufferedImage bufferedImage = ImageIO.read(is);
            String result = tesseract.doOCR(bufferedImage);
            //替换回车和tal键  使结果为一行
            result = result.replaceAll("\\r|\\n","-").replaceAll(" ","");
            return result;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "";
    }
}
